##############################
# Observation-based learning #
##############################
"""
Observation-based network learning model type.
"""
mutable struct NetworkLearnerObs{T,U,S,V,
				    R<:Vector{<:AbstractRelationalLearner},
				    C<:AbstractCollectiveInferer,
				    A<:Vector{<:AbstractAdjacency},
				    L<:Union{Void, MLLabelUtils.LabelEncoding},
				    OD<:LearnBase.ObsDimension} <: AbstractNetworkLearner 			 
	Ml::T											# local model
	fl_exec::U										# local model execution function
	Mr::S											# relational model
	fr_exec::V										# relational model execution function
	RL::R											# relational learner
	Ci::C											# collective inferer	
	Adj::A											# adjacency information
	use_local_data::Bool									# whether to use local data
	target_enc::L										# target encoding
	size_in::Int										# expected input dimensionality
	size_out::Int										# expected output dimensionality
	obsdim::OD										# observation dimension
end



# Printers
Base.show(io::IO, m::NetworkLearnerObs) = begin 
	println(io,"NetworkLearner, I/O size $(m.size_in)×$(m.size_out), observation based")
	print(io,"`- local model: "); println(io, m.Ml)
	print(io,"`- relational model: "); println(io, m.Mr)
	print(io,"`- relational learners: "); println(io, m.RL)
	print(io,"`- collective inferer: "); println(io, m.Ci)
	print(io,"`- adjacency: "); println(io, m.Adj)	
	println(io,"`- use local data: $(m.use_local_data)");
	println(io,"`- observations are: $(m.obsdim == ObsDim.Constant{1} ? "rows" : "columns")");
	println(io,"`- targets: $(m.target_enc isa Void ? "not encoded" : "encoded")");
end



####################
# Training methods #
####################
"""
	fit(::Type{NetworkLearnerObs}, X, y, Adj, fl_train, fl_exec, fr_train, fr_exec [;kwargs])

Training method for the observation-based network learning framework.

# Arguments
  * `X::AbstractMatrix` training data (used by `fl_train`, `fl_exec`; if `use_local_data==true`, it is also used by `fr_train`)
  * `Y::AbstractAray` data targets (used by `fl_train`, `fr_train`)
  * `Adj::Vector{AbstractAdjacency}` a vector containing the observation relational structures (adjacency objects)
  * `fl_train` local model training 'function'; can be anything that supports the call `fl_train((X,y))`
  * `fl_exec` local model prediction 'function'; can be anything that supports the call `fl_exec(Ml,X)` where `Ml = fl_train((X,y))`
  * `fr_train` relational model training `function`; can be anything that suports the call `fr_train((Xr,y))` 
  * `fr_exec` relational model prediction `function`; can be anything that suports the call `fr_exec(Mr,Xr)` where `Mr = fr_train((Xr,y))`
  and `Xr` is a dataset of relational variables generated by the relational learner using the results of the local model prediction
  function and adjacency structures.

# Keyword arguments
  * `priors::Vector{Float64}` class priors (if applicable)
  * `learner::Symbol` relational learner (i.e. variable generator); available options `:rn`, `:wrn`, `:bayesrn` and `:cdrn` (default `:wrn`)
  * `inference::Symbol` collective inference method; available options `:rl`, `:ic` and `:gs` (default `:rl`)
  * `normalize::Bool` whether to normalize the relational variables per-observation to the L1 norm (default `true`)
  * `use_local_data::Bool` whether the relational model should use the local data provided (i.e. in `X`) (default `true`)
  * `f_targets::Function` function that extracts targets from estimates generated by the local/relational models 
  (default `f_targets = x->MLDataPattern.targets(indmax,x)`)
  * `obsdim::Int` observation dimension (default `2`)
  * `tol::Float64` maximum admissible mean estimate error for collective inference convergence (default `1e-6`)
  * `κ::Float64` relaxation labeling starting constant, used if `learner == :rl` (default `1.0`)
  * `α::Float64` relaxation labeling decay constant, used if `learner == :rl` (default `0.99`)
  * `maxiter::Int` maximum number of iterations for collective inference (default `100`)
  * `bratio::Float64` percentage of iterations i.e. `maxiter` used for Gibbs sampling burn-in (default `0.1`)
"""
function fit(::Type{NetworkLearnerObs}, X::AbstractMatrix, y::AbstractArray, Adj::A where A<:Vector{<:AbstractAdjacency}, 
		fl_train, fl_exec, fr_train, fr_exec; 
		priors::Vector{Float64}=getpriors(y), learner::Symbol=:wvrn, inference::Symbol=:rl, 
		normalize::Bool=true, use_local_data::Bool=true, f_targets::Function=x->targets(indmax,x), 
		obsdim::Int = 2, tol::Float64=1e-6, κ::Float64=1.0, α::Float64=0.99, maxiter::Int=100, bratio::Float64=0.1) 

	# Parse, transform input arguments
	κ = clamp(κ, 1e-6, 1.0)
	α = clamp(α, 1e-6, 1.0-1e-6)
	tol = clamp(tol, 0.0, Inf)
	maxiter = ifelse(maxiter<=0, 1, maxiter)
	bratio = clamp(bratio, 1e-6, 1.0-1e-6)
	
	@assert all((priors.>=0.0) .& (priors .<=1.0)) "All priors have to be between 0.0 and 1.0."
	@assert obsdim in [1,2] "Observation dimension can have only two values 1 (row-major) or 2 (column-major)."
	
	# Parse relational learner argument and generate relational learner type
	if learner == :rn
		Rl = SimpleRN
	elseif learner == :wrn
		Rl = WeightedRN
	elseif learner == :cdrn 
		Rl = ClassDistributionRN
	elseif learner == :bayesrn
		Rl = BayesRN
	else
		@print_verbose 1 "Unknown relational learner. Defaulting to :wrn."
		Rl = WeightedRN
	end

	# Parse collective inference argument and generate collective inference objects
	if inference == :rl
		Ci = RelaxationLabelingInferer(maxiter, tol, f_targets, κ, α)
	elseif inference == :ic
		Ci = IterativeClassificationInferer(maxiter, tol, f_targets)
	elseif inference == :gs
		Ci = GibbsSamplingInferer(maxiter, tol, f_targets, ceil(Int, maxiter*bratio))
	else
		@print_verbose 1 "Unknown collective inferer. Defaulting to :rl."
		Ci = RelaxationLabelingInferer(maxiter, tol, f_targets, κ, α)
	end
	
	fit(NetworkLearnerObs, X, y, Adj, Rl, Ci, fl_train, fl_exec, fr_train, fr_exec; 
		priors=priors, normalize=normalize, use_local_data=use_local_data, 
		obsdim=ObsDim.Constant{obsdim}())
end


"""
Training method for the network learning framework. This method should not be called directly.
"""
function fit(::Type{NetworkLearnerObs}, X::T, y::S, Adj::A, Rl::R, Ci::C, fl_train::U, fl_exec::U2, fr_train::U3, fr_exec::U4; 
		priors::Vector{Float64}=getpriors(y), normalize::Bool=true, use_local_data::Bool=true, 
		obsdim::LearnBase.ObsDimension=ObsDim.Constant{2}()) where {
			T<:AbstractMatrix, 
			S<:AbstractArray, 
			A<:Vector{<:AbstractAdjacency}, 
			R<:Type{<:AbstractRelationalLearner}, 
			C<:AbstractCollectiveInferer, 
			U, U2, U3, U4 
		}
	
	# Step 0: pre-process input arguments and retrieve sizes
	size_in = nvars(X,obsdim)								# number of local variables
	size_out = get_size_out(y)								# number of relational variables / adjacency
	n = nobs(X,obsdim)									# number of observations
	m = length(Adj) * size_out								# number of relational variables

	@assert size_out == length(priors) "Found $c classes, the priors indicate $(length(priors))."
	
	# Pre-allocate relational variables array	
	if use_local_data									# Local observation variable data is used
		Xr = matrix_prealloc(n, size_in+m, obsdim, 0.0)
		_Xr = datasubset(Xr, 1:size_in, oppdim(obsdim))
		_Xr[:] = X
		offset = size_in					
	else											# Only relational variables are used
		Xr = matrix_prealloc(n, m, obsdim, 0.0)
		offset = 0
	end
	
	# Encode targets
	(t_enc, yₑ) = encode_targets(y)


	# Step 1: train and execute local model
	@print_verbose 2 "Training and applying local model ..."
	Dl = (X,yₑ)
	Ml = fl_train(Dl); 
	Xl = fl_exec(Ml,X);
	

	# Step 2: Get relational variables by training and executing the relational learner 
	@print_verbose 2 "Calculating relational variables ..."
	RL = [fit(Rl, Aᵢ, Xl, yₑ; obsdim=obsdim, priors=priors, normalize=normalize) 
       		for Aᵢ in Adj]									# Train relational learners				

	Xrᵢ = matrix_prealloc(n, size_out, obsdim, 0.0)						# Initialize temporary storage	
	for (i,(RLᵢ,Aᵢ)) in enumerate(zip(RL,Adj))		
		
		# Apply relational learner
		transform!(Xrᵢ, RLᵢ, Aᵢ, Xl, yₑ)

		# Update relational data output		
		_Xr = datasubset(Xr, offset+(i-1)*size_out+1:offset+i*size_out, oppdim(obsdim))
		_Xr[:] = Xrᵢ									
	end
	

	# Step 3 : train relational model 
	@print_verbose 2 "Training relational model ..."
	Mr = fr_train((Xr,yₑ))

	# Step 4: remove adjacency data 
	Adj_s = AbstractAdjacency[];
	for i in 1:length(Adj)
		push!(Adj_s, strip_adjacency(Adj[i]))	
	end


	# Step 5: return network learner 
	@print_verbose 2 "Done."
	return NetworkLearnerObs(Ml, fl_exec, Mr, fr_exec, RL, Ci, Adj_s, use_local_data, t_enc, size_in, size_out, obsdim)
end


#####################
# Execution methods #
#####################
"""
Prediction method for the network learning framework.
"""
function predict(model::M, X::T, update::BitVector=trues(nobs(X,model.obsdim))) where {M<:NetworkLearnerObs, T<:AbstractMatrix}
	Xo = matrix_prealloc(nobs(X,model.obsdim), model.size_out, model.obsdim, 0.0)
	predict!(Xo, model, X, update)
	return Xo
end

"""
In-place prediction method for the network learning framework.
"""
function predict!(Xo::S, model::M, X::T, update::BitVector=trues(nobs(X,model.obsdim))) where {M<:NetworkLearnerObs, T<:AbstractMatrix, S<:AbstractMatrix}
	
	# Step 0: Make initializations and pre-allocations 	
	obsdim = model.obsdim									# observation dimension
	m = nvars(X,obsdim)									# number of input variables
	n = nobs(X,obsdim)									# number of observations
	l = length(model.Adj)									# number of adjacencies in the model

	@assert model.size_in == m "Expected input dimensionality $(model.size_in), got $m."
	@assert (nvars(Xo,obsdim),nobs(Xo,obsdim)) == (model.size_out, n) "Output dataset size does not match expected size ($(model.size_out) variables×$n observations)."
	
	if model.use_local_data									# Pre-allocate relational dataset based on local data use
		Xr = matrix_prealloc(n, model.size_out*l+m, obsdim, 0.0)
		_Xr = datasubset(Xr, 1:m, oppdim(obsdim))
		_Xr[:] = X									#  - initialize (local) dimensions	
		offset = m
	else											# Skip local dataset dimensions
		Xr = matrix_prealloc(n, model.size_out*l, obsdim, 0.0)
		offset = 0
	end


	# Step 1: Apply local model, initialize output
	@print_verbose 2 "Executing local model ..."
	Xl = model.fl_exec(model.Ml, getobs(datasubset(X, update, obsdim)))
	@assert nvars(Xo,obsdim) == nvars(Xl,obsdim) "Local model outputs $(nvars(Xl,obsdim)) estimates/obs, the output indicates $(nvars(Xo,obsdim))." 	
	_Xo = datasubset(Xo, update, obsdim);
	_Xo[:] = Xl 

	# Step 2: Apply collective inference
	@print_verbose 2 "Collective inference ..."
	transform!(Xo, model.Ci, model.obsdim, model.Mr, model.fr_exec, model.RL, model.Adj, offset, Xr, update)	
	

	# Step 3: Return output estimates
	@print_verbose 2 "Done."
	return Xo
end



# It may be necessary to add adjacency information to the model, regarding the test data
"""
	add_adjacency!(model, Av)

Function that adds or, creates and then adds, adjacency objects contained in a vector `Av` to a network 
learning `model`. The method is used in 'ouf-of-graph' learning i.e. the training data is not used in 
the predictions for new samples. In such circumstances, the adjacency structures - graphs, matrices etc. 
pertinent to the new observations have to be created either using the same functions used in training or, 
separately supplied, so that relational variables can be created.
"""
function add_adjacency!(model::M, Av::Vector{T}) where {M<:NetworkLearnerObs, T<:AbstractAdjacency}
	@assert length(Av) == length(model.Adj) "New adjacency vector must have a length of $(length(model.Adj))."
	model.Adj = Av				
end
	
function add_adjacency!(model::M, Av::Vector{T}) where {M<:NetworkLearnerObs, T}
	@assert length(Av) == length(model.Adj) "Adjacency data vector must have a length of $(length(model.Adj))."
	model.Adj = adjacency.(Av)
end
